Goals

  1. Load in phyloseq data with rooted tree.
  2. Evaluate sequencing depth and remove sample.
  3. Normalize the read counts between samples.
  4. Calculate community dissimilarities. Numbers between 0 and 1. If 0, completely similar versus if they are 1, then they are completely dissimilar.
    1. Sorensen: Presence/ Absence. Weighted by number of shared taxa. Shared species as a binary-valye. Abundance-unweighted.
    2. Bray-Curtis: Relative abundance. Weighted by number of shared taxa. Shared abundant species: abundance weighted.
    3. (Abundance) Weighted UNIFRAC: Consider abundant species and where they fall on the tree.
  5. Visualize the community data with two unconstricted ordinations:
    1. PCoA: Linear method. Uses matrix algebra to calculate eigenvalye. Calculate how much variation is explained by each axis. Can choose to view axis 1, 2, 3, etc. and plot them together.
    2. NMDS: Non-linear method. Collapse multiple axes into two (or three) dimensions. Can see more axes of variation into fewer axes. Always need to report a stress value. (Ideally less than 0.15)
  6. Run statistics with PERMANOVA and betadispR.

Setup

Load Libraries

# Install packages if needed

pacman::p_load(tidyverse, devtools, phyloseq, patchwork, vegan, ggpubr, rstatix,
               ggplot2,install = FALSE)

Load in colors

host_species_colors <- c(
  "Acanthurus nigros" = "dodgerblue4",
  "Acanthurus achilles" = "#FF5733",
  "Acanthurus nigrofuscus" = "olivedrab",
  "Acanthurus olivaceus" = "purple4",
  "Acanthurus triostegus" = "maroon2",
  "Ctenochaetus striatus" = "yellow2",
  "Naso brevirostris" = "red2",
  "Naso lituratus" = "royalblue",
  "Naso tonganus" = "salmon2",
  "Naso unicornis" = "darkgoldenrod4",
  "Odax pullus" = "springgreen2",
  "Zebrasoma flavescens" = "mediumorchid",
  "Zebrasoma scopas" = "magenta",
  "Zebrasoma velifer" = "goldenrod")

Load in data

# Load in rooted phylogenetic tree!
load("data/03_Phylogenetic_Tree/phytree_preprocessed_physeq.RData")
unrooted_physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 30832 taxa and 83 samples ]
## sample_data() Sample Data:       [ 83 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 30832 taxa by 9 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 30832 tips and 30830 internal nodes ]
midroot_physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 30832 taxa and 83 samples ]
## sample_data() Sample Data:       [ 83 samples by 17 sample variables ]
## tax_table()   Taxonomy Table:    [ 30832 taxa by 9 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 30832 tips and 30831 internal nodes ]

Explore Read Counts

Raw Read Depth

# Sequence depth will inform us on how we want to normalize our data
# Calculate the total number of reads per sample
raw_total_seqs_df <-
  unrooted_physeq %>%
  # Calculate the sample read sums
  sample_sums() %>%
  data.frame()

# Name the column
colnames(raw_total_seqs_df)[1] <- "TotalSeqs"

head(raw_total_seqs_df)
##     TotalSeqs
## A16     81206
## A17     58967
## A18     50495
## AA1     87794
## AA2     69765
## AA3     63001
# Make a histogram of raw reads
raw_seqs_histogram <-
  raw_total_seqs_df %>%
  ggplot(aes(x = TotalSeqs)) +
  geom_histogram(bins = 50) +
  scale_x_continuous(limits = c(0, 50000)) +
  labs(title = "Raw Sequencing Depth Distribution") + 
  theme_bw()

Remove lowly sequenced sample

Normalize Read Counts

### scale_reads function and matround function
#################################################################################### 
# Function to scale reads: http://deneflab.github.io/MicrobeMiseq/ 
# Scales reads by 
# 1) taking proportions
# 2) multiplying by a given library size of n
# 3) rounding 
# Default for n is the minimum sample size in your library
# Default for round is floor

matround <- function(x){trunc(x+0.5)}

scale_reads <- function(physeq, n = min(sample_sums(physeq)), round = "round") {
  
  # transform counts to n
  physeq.scale <- transform_sample_counts(physeq, function(x) {(n * x/sum(x))})
  
  # Pick the rounding functions
  if (round == "floor"){
    otu_table(physeq.scale) <- floor(otu_table(physeq.scale))
  } else if (round == "round"){
    otu_table(physeq.scale) <- round(otu_table(physeq.scale))
  } else if (round == "matround"){
    otu_table(physeq.scale) <- matround(otu_table(physeq.scale))
  }
  
  # Prune taxa and return new phyloseq object
  physeq.scale <- prune_taxa(taxa_sums(physeq.scale) > 0, physeq.scale)
  return(physeq.scale)
  
  }

Rescale all reads so they all represent the count of the lowest number of sequence reads. We will expect each sample to have # of reads around 2200

This is where one might decide to use rarefaction to normalize the data.

Scale reads and check the distribution of the seq depth

min(sample_sums(midroot_physeq))
## [1] 17528
# Scale reads by the above function
scaled_rooted_physeq <-
  midroot_physeq %>%
  scale_reads(round = "matround")

# Calculate read depth
## Look at total number of sequences in each sample and compare to what we had before

scaled_total_seqs_df <- 
  scaled_rooted_physeq %>%
  sample_sums() %>%
  data.frame()

head(scaled_total_seqs_df)
##         .
## A16 17512
## A17 17575
## A18 17528
## AA1 17515
## AA2 17581
## AA3 17584
# Change first column name to be "TotalSeqs"
colnames(scaled_total_seqs_df)[1] <- "TotalSeqs"

# Inspect
head(scaled_total_seqs_df)
##     TotalSeqs
## A16     17512
## A17     17575
## A18     17528
## AA1     17515
## AA2     17581
## AA3     17584
# Check range of data
min_seqs <-
  min(scaled_total_seqs_df)
max_seqs <-
 max(scaled_total_seqs_df)
# Range of seqs
max_seqs - min_seqs
## [1] 332
# Plot histogram
scaled_total_seqs_df %>%
  ggplot(aes(x = TotalSeqs)) +
  geom_histogram(bins = 50) +
  scale_x_continuous(limits = c(0, 20000)) +
  labs(title = "Scaled Sequencing Depth at 7131") + 
  theme_bw()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).

head(scaled_total_seqs_df)
##     TotalSeqs
## A16     17512
## A17     17575
## A18     17528
## AA1     17515
## AA2     17581
## AA3     17584

Calculate & Visualize Community Dissimiliarity

Exploratory analyses from the Paily & Shankar (2016) paper, which is using unconstrained ordination methods like PCoA.

Sorenson PCoA

# Calculate sorenson dissimularity: Abundance-unweighted of shared taxa
scaled_soren_pcoa <-  
  ordinate(
    physeq = scaled_rooted_physeq,
    method = "PCoA",
    distance = "bray", binary = TRUE)

#str(scaled_soren_pcoa)

# Plot the ordination 
soren_host_species_pcoa <- plot_ordination(
  physeq = scaled_rooted_physeq,
  ordination = scaled_soren_pcoa,
  color = "host_species",
  title = "Sorensen PCoA") +
  geom_point(size=5, alpha = 0.5, aes(color = host_species)) +
  scale_color_manual(values = host_species_colors) + 
  theme_bw()
# Show the plot 
soren_host_species_pcoa

Bray-Curtis PCoA

# Calculate the BC distance
scaled_BC_pcoa <- 
  ordinate(
    physeq = scaled_rooted_physeq,
    method = "PCoA",
    distance = "bray")

# Plot the PCoA
bray_host_species_pcoa <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_BC_pcoa,
    color = "host_species",
    title = "Bray-Curtis PCoA") +
  geom_point(size=5, alpha = 0.5, aes(color = host_species)) +
  scale_color_manual(values = c(host_species_colors)) + 
  theme_bw()
bray_host_species_pcoa

Weighted-Unifrac PCoA

# Calculate the BC distance
scaled_wUNI_pcoa <- 
  ordinate(
    physeq = scaled_rooted_physeq,
    method = "PCoA",
    distance = "wunifrac")

# Plot the PCoA
wUNI_host_species_pcoa <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_pcoa,
    color = "host_species",
    title = "Weighted Unifrac PCoA") +
  geom_point(size=5, alpha = 0.5, aes(color = host_species)) +
  scale_color_manual(values = c(host_species_colors)) + 
  theme_bw()
wUNI_host_species_pcoa

Combine PCoAs

Let’s plot all three together into one plot to have a concise visualization of the three metrics.

(soren_host_species_pcoa + theme(legend.position = "none")) + 
  (bray_host_species_pcoa + theme(legend.position = "none")) + 
    (wUNI_host_species_pcoa + theme(legend.position = "none"))

NMDS

Weighted Unifrac

Since we did 3 of the dissimilarity metrics for the PCoA, let’s just plot one example of them for the NMDS plotting. Here, we will use weighted Unifrac

# Calculate the Weighted Unifrac distance
scaled_wUNI_nmds <- 
  ordinate(
    physeq = scaled_rooted_physeq,
    method = "NMDS",
    distance = "wunifrac")
## Run 0 stress 0.2256408 
## Run 1 stress 0.2323928 
## Run 2 stress 0.2361623 
## Run 3 stress 0.2388469 
## Run 4 stress 0.2362672 
## Run 5 stress 0.2256472 
## ... Procrustes: rmse 0.01574961  max resid 0.0743233 
## Run 6 stress 0.2265927 
## Run 7 stress 0.2296346 
## Run 8 stress 0.2330786 
## Run 9 stress 0.2231072 
## ... New best solution
## ... Procrustes: rmse 0.06198358  max resid 0.2610052 
## Run 10 stress 0.2396078 
## Run 11 stress 0.2364469 
## Run 12 stress 0.2422648 
## Run 13 stress 0.232336 
## Run 14 stress 0.2476483 
## Run 15 stress 0.2262167 
## Run 16 stress 0.2218319 
## ... New best solution
## ... Procrustes: rmse 0.05588632  max resid 0.1929279 
## Run 17 stress 0.2259062 
## Run 18 stress 0.2335861 
## Run 19 stress 0.2344356 
## Run 20 stress 0.2440734 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
# Plot the PCoA
wUNI_host_species_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "host_species") +
  geom_point(size=5, alpha = 0.5, aes(color = host_species)) +
  scale_color_manual(values = c(host_species_colors)) + 
  theme_bw() + labs(color = "host_species")
wUNI_host_species_nmds

(wUNI_host_species_pcoa + theme(legend.position = "none")) + 
  (wUNI_host_species_nmds + theme(legend.position = "none"))

Statistical Significance Testing

PERMANOVA

# Calculate all three of the distance matrices
scaled_sorensen_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray", binary = TRUE)
scaled_bray_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray")
scaled_wUnifrac_dist <- phyloseq::distance(scaled_rooted_physeq, method = "wunifrac")

# make a data frame from the sample_data
# All distance matrices will be the same metadata because they 
# originate from the same phyloseq object. 
metadata <- data.frame(sample_data(scaled_rooted_physeq))

# Adonis test
# In this example we are testing the hypothesis that the five stations
# that were collected have different centroids in the ordination space 
# for each of the dissimilarity metrics, we are using a discrete variable 
adonis2(scaled_sorensen_dist ~ host_species, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_sorensen_dist ~ host_species, data = metadata)
##          Df SumOfSqs    R2      F Pr(>F)    
## Model    13   11.742 0.377 3.2119  0.001 ***
## Residual 69   19.405 0.623                  
## Total    82   31.147 1.000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_bray_dist ~ host_species, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_bray_dist ~ host_species, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model    13   15.045 0.46005 4.5223  0.001 ***
## Residual 69   17.658 0.53995                  
## Total    82   32.703 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist ~ host_species, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ host_species, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model    13  0.37526 0.46931 4.6938  0.001 ***
## Residual 69  0.42434 0.53069                  
## Total    82  0.79959 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Note that:

  • R2 = the percent variation explained.
  • F = the F-Statistic, which represents the importance value.
  • Pr(>F) = the pvalue
# We might also care about other variables
# Here, we will add date and fraction as variables
# multiplicative model ORDER MATTERS! 
adonis2(scaled_sorensen_dist ~ host_species * gut_section, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_sorensen_dist ~ host_species * gut_section, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model    35   18.268 0.58651 1.9047  0.001 ***
## Residual 47   12.879 0.41349                  
## Total    82   31.147 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_bray_dist ~ host_species * diet, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_bray_dist ~ host_species * diet, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model    13   15.045 0.46005 4.5223  0.001 ***
## Residual 69   17.658 0.53995                  
## Total    82   32.703 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Note that the ORDER MATTERS!
adonis2(scaled_wUnifrac_dist ~ gut_section * diet, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ gut_section * diet, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model    12  0.20632 0.25803 2.0286  0.001 ***
## Residual 70  0.59327 0.74197                  
## Total    82  0.79959 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist ~ diet * gut_section, data = metadata)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ diet * gut_section, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model    12  0.20632 0.25803 2.0286  0.001 ***
## Residual 70  0.59327 0.74197                  
## Total    82  0.79959 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We can also run tests that include additive (+) or multipliciatve models, which include the interaction term between variables.

BetaDispR

The PERMANOVA is sensitive to variance/dispersion in the data. Therefore, we need to run a homogeneity of dispersion test to test for the sensitivity of our PERMANOVA results to variance.

# Homogeneity of Disperson test with beta dispr
# Sorensen 
beta_soren_host_species <- betadisper(scaled_sorensen_dist, metadata$host_species)
permutest(beta_soren_host_species)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq   Mean Sq     F N.Perm Pr(>F)   
## Groups    13 0.24978 0.0192136 2.968    999  0.004 **
## Residuals 69 0.44668 0.0064736                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Bray-curtis 
beta_bray_host_species <- betadisper(scaled_bray_dist, metadata$host_species)
permutest(beta_bray_host_species)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)    
## Groups    13 0.41369 0.031823 3.9614    999  0.001 ***
## Residuals 69 0.55429 0.008033                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Weighted Unifrac 
beta_bray_host_species <- betadisper(scaled_wUnifrac_dist, metadata$host_species)
permutest(beta_bray_host_species)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df   Sum Sq   Mean Sq      F N.Perm Pr(>F)    
## Groups    13 0.027258 0.0020968 4.0778    999  0.001 ***
## Residuals 69 0.035480 0.0005142                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Above, our variance is not impacted by gut section.

Taxonomic Composition

Phylum

# Set the phylum colors
phylum_colors <- c(
  Abditibacteriota = "navy", 
  Acidobacteriota = "darkslategray2", 
  Actinobacteriota = "deeppink1",
  Armatimonadota = "plum2", 
  Bacteroidota = "gold", 
  Bdellovibrionota = "plum1", 
  Calditrichota = "red1",
  Campylobacterota ="black", 
  Chloroflexi = "firebrick",
  Cyanobacteria = "limegreen",
  Dadabacteria = "grey", 
  Deferribacterota ="magenta",
  Deinococcota = "darkgreen",
  Dependentiae = "#3E9B96",
  Desulfobacterota = "greenyellow",
  Elusimicrobiota = "yellow",
  Entotheonellaeota = "#B5D6AA",
  Fibrobacterota = "palevioletred1",
  Firmicutes = "royalblue",
  Fusobacteriota = "darkorange", 
 Gemmatimonadota = "olivedrab",
  Halanaerobiaeota = "green",
  Hydrogenedentes = "darkorchid1",
 Latescibacterota = "khaki2",
 MBNT15 = "gold1",
 Methylomirabilota = "gold4",
 Myxococcota = "hotpink",
 Nitrospinota = "hotpink4",
Nitrospirota = "indianred",
Patescibacteria = "ivory4",
Planctomycetota = "lightgreen",
Proteobacteria = "blueviolet",
"SAR324 clade(Marine group B)" = "brown",
Spirochaetota = "cyan2",
Spirochaetota = "cornflowerblue",
Sva0485 = "aquamarine4",
Synergistota = "palegreen",
Verrucomicrobiota = "plum1",
Zixibacteria = "firebrick4"
)

Plot Phylum Composition

# Goal is to calculate the phylum relative abundance
# Note: the read depth must be normalized in some way: scaled_reads
phylum_df <-
  scaled_rooted_physeq %>%
  # agglomerate at the phylum level
  tax_glom(taxrank = "Phylum") %>%
  # Transform counts to relative abundance
  transform_sample_counts(function (x) {x/sum(x)}) %>%
  # Melt to a long format
  psmelt() %>%
  # Filter out phyla that are less than one percent - get rid of low abundant Phyla
  dplyr::filter(Abundance > 0.01)
  # fix the order of date
  
# Stacked bar plot with all Phyla
# Plot Phylum Abundances - make sure to load phylum_colors
phylum_df %>%
  # Warning: Its important to have one sample per x value,
  # Otherwise, it will take the sum between multiple samples
  ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
  geom_bar(stat = "identity", color = "black") +
  labs(title = "Host Species Phylum Composition") + 
  scale_fill_manual(values = phylum_colors) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))

phylum_df %>% 
  ggplot(aes(x = Sample, y = Abundance, fill = Phylum)) + 
  geom_bar(stat = "identity", color = "black") +
  facet_wrap(~host_species, scales="free_x") +
  theme(axis.text.x = element_text(angle = 90)) +
  scale_fill_manual(values = phylum_colors) +
  theme_minimal() +
  labs(title = "Phylum Level Composition")

Genus

# Set the phylum colors
Genus_colors <- c(
  "[Anaerorhadbus] furcosa group" = "darkslategray2",
  Akkermansia = "navy", 
  Alistipes = "deeppink1",
  Bacillus = "plum2", 
  Bacteroides = "gold", 
  Brevinema = "magenta4", 
  Cellulosilyticum = "red1",
  Cetobacterium ="black", 
  "CHKCI002" = "firebrick",
  "Clostridium sensu stricto 1" = "limegreen",
  Desulfovibrio = "grey", 
  DMI ="magenta",
  Endozoicomonas = "greenyellow",
  Enterovibrio = "yellow",
  Epulopiscium = "blue3",
  Erysipelatoclostridium = "palevioletred1",
  Lachnoclostridium = "royalblue",
  Pseudomonas = "greenyellow",
  Ralstonia = "darkgray",
  Romboustia = "violetred",
  Sphingobium = "blueviolet",
  Terrisporobacter = "chartreuse4",
  Thaumasiovibrio = "coral1",
  Treponema = "coral4",
  Turicibacter = "cornflowerblue",
  Tuzzerella = "darkgoldenrod",
  Tyzzerella = "darkorchid",
  "UCG-008" = "deeppink",
  Vibrio = "forestgreen")
genus_epulo_df <- 
  scaled_rooted_physeq %>%
  tax_glom(taxrank = "Genus") %>% #agglomerate by genus 
  transform_sample_counts(function(x) {x/sum(x)}) %>% # transform to relative abundances
  psmelt() %>% #melt pyloseq object into long format df
  dplyr::filter(Genus == "Epulopiscium")

genus_epulo_df %>% 
  ggplot(aes(x = Sample, y = Abundance, fill = Genus)) + 
  geom_bar(stat = "identity", color = "black") +
  facet_wrap(~host_species, scales="free_x") +
  theme(axis.text.x = element_text(angle = 90)) +
  theme_minimal() +
  labs(title = "Relative Abundance Epulopisicum")

Genus_df <-
  scaled_rooted_physeq %>%
  # agglomerate at the phylum level
  tax_glom(taxrank = "Genus") %>%
  # Transform counts to relative abundance
  transform_sample_counts(function (x) {x/sum(x)}) %>%
  # Melt to a long format
  psmelt() %>%
  # Filter out phyla that are less than ten percent - get rid of low abundant Phyla
  dplyr::filter(Abundance > 0.1)

Genus_df %>%
  # Warning: Its important to have one sample per x value,
  # Otherwise, it will take the sum between multiple samples
  ggplot(aes(x = names, 
             y = Abundance, fill = Genus)) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_manual(values = Genus_colors) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))

Session Information

For Reproducibility

#Ensure reproducibility
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.4.2 (2024-10-31)
##  os       macOS Sequoia 15.2
##  system   x86_64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/New_York
##  date     2025-01-22
##  pandoc   3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/x86_64/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package          * version   date (UTC) lib source
##  abind              1.4-8     2024-09-12 [1] CRAN (R 4.4.1)
##  ade4               1.7-22    2023-02-06 [1] CRAN (R 4.4.0)
##  ape                5.8-1     2024-12-16 [1] CRAN (R 4.4.1)
##  backports          1.5.0     2024-05-23 [1] CRAN (R 4.4.0)
##  Biobase            2.62.0    2023-10-24 [1] Bioconductor
##  BiocGenerics       0.52.0    2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  biomformat         1.34.0    2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  Biostrings         2.74.1    2024-12-16 [1] Bioconductor 3.20 (R 4.4.2)
##  bitops             1.0-9     2024-10-03 [1] CRAN (R 4.4.1)
##  broom              1.0.7     2024-09-26 [1] CRAN (R 4.4.1)
##  bslib              0.8.0     2024-07-29 [1] CRAN (R 4.4.0)
##  cachem             1.1.0     2024-05-16 [1] CRAN (R 4.4.0)
##  car                3.1-3     2024-09-27 [1] CRAN (R 4.4.1)
##  carData            3.0-5     2022-01-06 [1] CRAN (R 4.4.0)
##  cli                3.6.3     2024-06-21 [1] CRAN (R 4.4.0)
##  cluster            2.1.6     2023-12-01 [2] CRAN (R 4.4.2)
##  codetools          0.2-20    2024-03-31 [2] CRAN (R 4.4.2)
##  colorspace         2.1-1     2024-07-26 [1] CRAN (R 4.4.0)
##  crayon             1.5.3     2024-06-20 [1] CRAN (R 4.4.0)
##  data.table         1.16.4    2024-12-06 [1] CRAN (R 4.4.1)
##  devtools         * 2.4.5     2022-10-11 [1] CRAN (R 4.4.0)
##  digest             0.6.37    2024-08-19 [1] CRAN (R 4.4.1)
##  dplyr            * 1.1.4     2023-11-17 [1] CRAN (R 4.4.0)
##  ellipsis           0.3.2     2021-04-29 [1] CRAN (R 4.4.0)
##  evaluate           1.0.1     2024-10-10 [1] CRAN (R 4.4.1)
##  farver             2.1.2     2024-05-13 [1] CRAN (R 4.4.0)
##  fastmap            1.2.0     2024-05-15 [1] CRAN (R 4.4.0)
##  forcats          * 1.0.0     2023-01-29 [1] CRAN (R 4.4.0)
##  foreach            1.5.2     2022-02-02 [1] CRAN (R 4.4.0)
##  Formula            1.2-5     2023-02-24 [1] CRAN (R 4.4.0)
##  fs                 1.6.5     2024-10-30 [1] CRAN (R 4.4.1)
##  generics           0.1.3     2022-07-05 [1] CRAN (R 4.4.0)
##  GenomeInfoDb       1.38.8    2024-03-15 [1] Bioconductor 3.18 (R 4.4.2)
##  GenomeInfoDbData   1.2.11    2025-01-09 [1] Bioconductor
##  ggplot2          * 3.5.1     2024-04-23 [1] CRAN (R 4.4.0)
##  ggpubr           * 0.6.0     2023-02-10 [1] CRAN (R 4.4.0)
##  ggsignif           0.6.4     2022-10-13 [1] CRAN (R 4.4.0)
##  glue               1.8.0     2024-09-30 [1] CRAN (R 4.4.1)
##  gtable             0.3.6     2024-10-25 [1] CRAN (R 4.4.1)
##  hms                1.1.3     2023-03-21 [1] CRAN (R 4.4.0)
##  htmltools          0.5.8.1   2024-04-04 [1] CRAN (R 4.4.0)
##  htmlwidgets        1.6.4     2023-12-06 [1] CRAN (R 4.4.0)
##  httpuv             1.6.15    2024-03-26 [1] CRAN (R 4.4.0)
##  igraph             2.1.2     2024-12-07 [1] CRAN (R 4.4.1)
##  IRanges            2.40.1    2024-12-05 [1] Bioconductor 3.20 (R 4.4.2)
##  iterators          1.0.14    2022-02-05 [1] CRAN (R 4.4.0)
##  jquerylib          0.1.4     2021-04-26 [1] CRAN (R 4.4.0)
##  jsonlite           1.8.9     2024-09-20 [1] CRAN (R 4.4.1)
##  knitr              1.49      2024-11-08 [1] CRAN (R 4.4.1)
##  labeling           0.4.3     2023-08-29 [1] CRAN (R 4.4.0)
##  later              1.4.1     2024-11-27 [1] CRAN (R 4.4.1)
##  lattice          * 0.22-6    2024-03-20 [2] CRAN (R 4.4.2)
##  lifecycle          1.0.4     2023-11-07 [1] CRAN (R 4.4.0)
##  lubridate        * 1.9.4     2024-12-08 [1] CRAN (R 4.4.1)
##  magrittr           2.0.3     2022-03-30 [1] CRAN (R 4.4.0)
##  MASS               7.3-61    2024-06-13 [2] CRAN (R 4.4.2)
##  Matrix             1.7-1     2024-10-18 [2] CRAN (R 4.4.2)
##  memoise            2.0.1     2021-11-26 [1] CRAN (R 4.4.0)
##  mgcv               1.9-1     2023-12-21 [2] CRAN (R 4.4.2)
##  mime               0.12      2021-09-28 [1] CRAN (R 4.4.0)
##  miniUI             0.1.1.1   2018-05-18 [1] CRAN (R 4.4.0)
##  multtest           2.58.0    2023-10-24 [1] Bioconductor
##  munsell            0.5.1     2024-04-01 [1] CRAN (R 4.4.0)
##  nlme               3.1-166   2024-08-14 [2] CRAN (R 4.4.2)
##  pacman             0.5.1     2019-03-11 [1] CRAN (R 4.4.0)
##  patchwork        * 1.3.0     2024-09-16 [1] CRAN (R 4.4.1)
##  permute          * 0.9-7     2022-01-27 [1] CRAN (R 4.4.0)
##  phyloseq         * 1.50.0    2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  pillar             1.10.1    2025-01-07 [1] CRAN (R 4.4.1)
##  pkgbuild           1.4.5     2024-10-28 [1] CRAN (R 4.4.1)
##  pkgconfig          2.0.3     2019-09-22 [1] CRAN (R 4.4.0)
##  pkgload            1.4.0     2024-06-28 [1] CRAN (R 4.4.0)
##  plyr               1.8.9     2023-10-02 [1] CRAN (R 4.4.0)
##  profvis            0.4.0     2024-09-20 [1] CRAN (R 4.4.1)
##  promises           1.3.2     2024-11-28 [1] CRAN (R 4.4.1)
##  purrr            * 1.0.2     2023-08-10 [1] CRAN (R 4.4.0)
##  R6                 2.5.1     2021-08-19 [1] CRAN (R 4.4.0)
##  Rcpp               1.0.13-1  2024-11-02 [1] CRAN (R 4.4.1)
##  RCurl              1.98-1.16 2024-07-11 [1] CRAN (R 4.4.0)
##  readr            * 2.1.5     2024-01-10 [1] CRAN (R 4.4.0)
##  remotes            2.5.0     2024-03-17 [1] CRAN (R 4.4.0)
##  reshape2           1.4.4     2020-04-09 [1] CRAN (R 4.4.0)
##  rhdf5              2.50.1    2024-12-09 [1] Bioconductor 3.20 (R 4.4.2)
##  rhdf5filters       1.18.0    2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  Rhdf5lib           1.28.0    2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  rlang              1.1.4     2024-06-04 [1] CRAN (R 4.4.0)
##  rmarkdown          2.29      2024-11-04 [1] CRAN (R 4.4.1)
##  rstatix          * 0.7.2     2023-02-01 [1] CRAN (R 4.4.0)
##  rstudioapi         0.17.1    2024-10-22 [1] CRAN (R 4.4.1)
##  S4Vectors          0.44.0    2024-10-29 [1] Bioconductor 3.20 (R 4.4.1)
##  sass               0.4.9     2024-03-15 [1] CRAN (R 4.4.0)
##  scales             1.3.0     2023-11-28 [1] CRAN (R 4.4.0)
##  sessioninfo        1.2.2     2021-12-06 [1] CRAN (R 4.4.0)
##  shiny              1.10.0    2024-12-14 [1] CRAN (R 4.4.1)
##  stringi            1.8.4     2024-05-06 [1] CRAN (R 4.4.0)
##  stringr          * 1.5.1     2023-11-14 [1] CRAN (R 4.4.0)
##  survival           3.7-0     2024-06-05 [2] CRAN (R 4.4.2)
##  tibble           * 3.2.1     2023-03-20 [1] CRAN (R 4.4.0)
##  tidyr            * 1.3.1     2024-01-24 [1] CRAN (R 4.4.0)
##  tidyselect         1.2.1     2024-03-11 [1] CRAN (R 4.4.0)
##  tidyverse        * 2.0.0     2023-02-22 [1] CRAN (R 4.4.0)
##  timechange         0.3.0     2024-01-18 [1] CRAN (R 4.4.0)
##  tzdb               0.4.0     2023-05-12 [1] CRAN (R 4.4.0)
##  urlchecker         1.0.1     2021-11-30 [1] CRAN (R 4.4.0)
##  usethis          * 3.1.0     2024-11-26 [1] CRAN (R 4.4.1)
##  vctrs              0.6.5     2023-12-01 [1] CRAN (R 4.4.0)
##  vegan            * 2.6-8     2024-08-28 [1] CRAN (R 4.4.1)
##  withr              3.0.2     2024-10-28 [1] CRAN (R 4.4.1)
##  xfun               0.50      2025-01-07 [1] CRAN (R 4.4.1)
##  xtable             1.8-4     2019-04-21 [1] CRAN (R 4.4.0)
##  XVector            0.42.0    2023-10-24 [1] Bioconductor
##  yaml               2.3.10    2024-07-26 [1] CRAN (R 4.4.0)
##  zlibbioc           1.48.2    2024-03-13 [1] Bioconductor 3.18 (R 4.4.2)
## 
##  [1] /Users/cab565/Library/R/x86_64/4.4/library
##  [2] /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────